#拿到页面源代码，requests
#通过re来提取想要的有效信息 re
import  requests
import  re
import csv
url="https://movie.douban.com/top250"
header={
    "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0"
}
resp=requests.get(url,headers=header)
page_content=resp.text

#解析数据
obj=re.compile(r'<li>.*?<div class="item">.*?<span class="title">(?P<name>.*?)</span>'
               r'.*?<div class="bd">.*?<br>(?P<year>.*?)&nbsp.*?'
               r'<span class="rating_num" property="v:average">(?P<pf>.*?)</span>.*?<span>'
               r'(?P<number>.*?)人评价',re.S)
#开始匹配
result=obj.finditer(page_content)
f=open("data.csv",mode="w",encoding="utf-8",newline='')
csveriter=csv.writer(f)
for it in  result:
    # print(it.group("name"))
    # print(it.group("pf"))
    # print(it.group("number"))
    # print(it.group("year").strip())
    dic=it.groupdict()

    dic["year"]=dic["year"].strip()
    print(dic)
    csveriter.writerow(dic.values())
f.close()
print("over")
